In [5]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

Grid Searches


In [6]:
from sklearn.datasets import load_iris

iris = load_iris()
X, y = iris.data, iris.target

In [7]:
from sklearn.svm import SVC

Cs = [0.001, 0.01, 0.1, 1, 10]
gammas = [0.001, 0.01, 0.1, 1]

Manual Variant


In [8]:
from sklearn.cross_validation import cross_val_score

for C in Cs:
    for gamma in gammas:
        scores = cross_val_score(SVC(C=C, gamma=gamma), X, y)
        print("C: %f, gamma: %f, average score: %f" % (C, gamma, np.mean(scores)))


C: 0.001000, gamma: 0.001000, average score: 0.913807
C: 0.001000, gamma: 0.010000, average score: 0.913807
C: 0.001000, gamma: 0.100000, average score: 0.919935
C: 0.001000, gamma: 1.000000, average score: 0.953840
C: 0.010000, gamma: 0.001000, average score: 0.913807
C: 0.010000, gamma: 0.010000, average score: 0.913807
C: 0.010000, gamma: 0.100000, average score: 0.919935
C: 0.010000, gamma: 1.000000, average score: 0.953840
C: 0.100000, gamma: 0.001000, average score: 0.913807
C: 0.100000, gamma: 0.010000, average score: 0.913807
C: 0.100000, gamma: 0.100000, average score: 0.926879
C: 0.100000, gamma: 1.000000, average score: 0.966912
C: 1.000000, gamma: 0.001000, average score: 0.913807
C: 1.000000, gamma: 0.010000, average score: 0.947304
C: 1.000000, gamma: 0.100000, average score: 0.973448
C: 1.000000, gamma: 1.000000, average score: 0.973856
C: 10.000000, gamma: 0.001000, average score: 0.947304
C: 10.000000, gamma: 0.010000, average score: 0.973448
C: 10.000000, gamma: 0.100000, average score: 0.973856
C: 10.000000, gamma: 1.000000, average score: 0.973856

GridSearchCV


In [9]:
from sklearn.grid_search import GridSearchCV

param_grid = {'C': Cs, 'gamma' : gammas}

print(param_grid)


{'C': [0.001, 0.01, 0.1, 1, 10], 'gamma': [0.001, 0.01, 0.1, 1]}

In [10]:
grid_search = GridSearchCV(SVC(), param_grid, verbose=3, cv=5)

In [11]:
grid_search.fit(X, y)


Fitting 5 folds for each of 20 candidates, totalling 100 fits
[CV] C=0.001, gamma=0.001 ............................................
[CV] ................... C=0.001, gamma=0.001, score=0.866667 -   0.0s
[CV] C=0.001, gamma=0.001 ............................................
[CV] ................... C=0.001, gamma=0.001, score=0.966667 -   0.0s
[CV] C=0.001, gamma=0.001 ............................................
[CV] ................... C=0.001, gamma=0.001, score=0.833333 -   0.0s
[CV] C=0.001, gamma=0.001 ............................................
[CV] ................... C=0.001, gamma=0.001, score=0.966667 -   0.0s
[CV] C=0.001, gamma=0.001 ............................................
[CV] ................... C=0.001, gamma=0.001, score=0.933333 -   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .................... C=0.001, gamma=0.01, score=0.866667 -   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .................... C=0.001, gamma=0.01, score=0.966667 -   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .................... C=0.001, gamma=0.01, score=0.833333 -   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .................... C=0.001, gamma=0.01, score=0.966667 -   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .................... C=0.001, gamma=0.01, score=0.933333 -   0.0s
[CV] C=0.001, gamma=0.1 ..............................................
[CV] ..................... C=0.001, gamma=0.1, score=0.900000 -   0.0s
[CV] C=0.001, gamma=0.1 ..............................................
[CV] ..................... C=0.001, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.001, gamma=0.1 ..............................................
[CV] ..................... C=0.001, gamma=0.1, score=0.866667 -   0.0s
[CV] C=0.001, gamma=0.1 ..............................................
[CV] ..................... C=0.001, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.001, gamma=0.1 ..............................................
[CV] ..................... C=0.001, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.001, gamma=1 ................................................
[CV] ....................... C=0.001, gamma=1, score=0.933333 -   0.0s
[CV] C=0.001, gamma=1 ................................................
[CV] ....................... C=0.001, gamma=1, score=0.966667 -   0.0s
[CV] C=0.001, gamma=1 ................................................
[CV] ....................... C=0.001, gamma=1, score=0.966667 -   0.0s
[CV] C=0.001, gamma=1 ................................................
[CV] ....................... C=0.001, gamma=1, score=0.933333 -   0.0s
[CV] C=0.001, gamma=1 ................................................
[CV] ....................... C=0.001, gamma=1, score=1.000000 -   0.0s
[Parallel(n_jobs=1)]: Done   1 jobs       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done  32 jobs       | elapsed:    0.0s
[CV] C=0.01, gamma=0.001 .............................................
[CV] .................... C=0.01, gamma=0.001, score=0.866667 -   0.0s
[CV] C=0.01, gamma=0.001 .............................................
[CV] .................... C=0.01, gamma=0.001, score=0.966667 -   0.0s
[CV] C=0.01, gamma=0.001 .............................................
[CV] .................... C=0.01, gamma=0.001, score=0.833333 -   0.0s
[CV] C=0.01, gamma=0.001 .............................................
[CV] .................... C=0.01, gamma=0.001, score=0.966667 -   0.0s
[CV] C=0.01, gamma=0.001 .............................................
[CV] .................... C=0.01, gamma=0.001, score=0.933333 -   0.0s
[CV] C=0.01, gamma=0.01 ..............................................
[CV] ..................... C=0.01, gamma=0.01, score=0.866667 -   0.0s
[CV] C=0.01, gamma=0.01 ..............................................
[CV] ..................... C=0.01, gamma=0.01, score=0.966667 -   0.0s
[CV] C=0.01, gamma=0.01 ..............................................
[CV] ..................... C=0.01, gamma=0.01, score=0.833333 -   0.0s
[CV] C=0.01, gamma=0.01 ..............................................
[CV] ..................... C=0.01, gamma=0.01, score=0.966667 -   0.0s
[CV] C=0.01, gamma=0.01 ..............................................
[CV] ..................... C=0.01, gamma=0.01, score=0.933333 -   0.0s
[CV] C=0.01, gamma=0.1 ...............................................
[CV] ...................... C=0.01, gamma=0.1, score=0.900000 -   0.0s
[CV] C=0.01, gamma=0.1 ...............................................
[CV] ...................... C=0.01, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.01, gamma=0.1 ...............................................
[CV] ...................... C=0.01, gamma=0.1, score=0.866667 -   0.0s
[CV] C=0.01, gamma=0.1 ...............................................
[CV] ...................... C=0.01, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.01, gamma=0.1 ...............................................
[CV] ...................... C=0.01, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.01, gamma=1 .................................................
[CV] ........................ C=0.01, gamma=1, score=0.933333 -   0.0s
[CV] C=0.01, gamma=1 .................................................
[CV] ........................ C=0.01, gamma=1, score=0.966667 -   0.0s
[CV] C=0.01, gamma=1 .................................................
[CV] ........................ C=0.01, gamma=1, score=0.966667 -   0.0s
[CV] C=0.01, gamma=1 .................................................
[CV] ........................ C=0.01, gamma=1, score=0.933333 -   0.0s
[CV] C=0.01, gamma=1 .................................................
[CV] ........................ C=0.01, gamma=1, score=1.000000 -   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.866667 -   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.966667 -   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.833333 -   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.966667 -   0.0s
[CV] C=0.1, gamma=0.001 ..............................................
[CV] ..................... C=0.1, gamma=0.001, score=0.933333 -   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.866667 -   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.966667 -   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.833333 -   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.966667 -   0.0s
[CV] C=0.1, gamma=0.01 ...............................................
[CV] ...................... C=0.1, gamma=0.01, score=0.933333 -   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.900000 -   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.900000 -   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.1, gamma=0.1 ................................................
[CV] ....................... C=0.1, gamma=0.1, score=0.933333 -   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......................... C=0.1, gamma=1, score=0.933333 -   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......................... C=0.1, gamma=1, score=0.966667 -   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......................... C=0.1, gamma=1, score=0.933333 -   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......................... C=0.1, gamma=1, score=0.966667 -   0.0s
[CV] C=0.1, gamma=1 ..................................................
[CV] ......................... C=0.1, gamma=1, score=1.000000 -   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] ....................... C=1, gamma=0.001, score=0.866667 -   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] ....................... C=1, gamma=0.001, score=0.966667 -   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] ....................... C=1, gamma=0.001, score=0.833333 -   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] ....................... C=1, gamma=0.001, score=0.966667 -   0.0s
[CV] C=1, gamma=0.001 ................................................
[CV] ....................... C=1, gamma=0.001, score=0.933333 -   0.0s
[CV] C=1, gamma=0.01 .................................................
[CV] ........................ C=1, gamma=0.01, score=0.900000 -   0.0s
[CV] C=1, gamma=0.01 .................................................
[CV] ........................ C=1, gamma=0.01, score=0.966667 -   0.0s
[CV] C=1, gamma=0.01 .................................................
[CV] ........................ C=1, gamma=0.01, score=0.866667 -   0.0s
[CV] C=1, gamma=0.01 .................................................
[CV] ........................ C=1, gamma=0.01, score=0.933333 -   0.0s
[CV] C=1, gamma=0.01 .................................................
[CV] ........................ C=1, gamma=0.01, score=1.000000 -   0.0s
[CV] C=1, gamma=0.1 ..................................................
[CV] ......................... C=1, gamma=0.1, score=0.966667 -   0.0s
[CV] C=1, gamma=0.1 ..................................................
[CV] ......................... C=1, gamma=0.1, score=1.000000 -   0.0s
[CV] C=1, gamma=0.1 ..................................................
[CV] ......................... C=1, gamma=0.1, score=0.966667 -   0.0s
[CV] C=1, gamma=0.1 ..................................................
[CV] ......................... C=1, gamma=0.1, score=0.966667 -   0.0s
[CV] C=1, gamma=0.1 ..................................................
[CV] ......................... C=1, gamma=0.1, score=1.000000 -   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] ........................... C=1, gamma=1, score=0.966667 -   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] ........................... C=1, gamma=1, score=1.000000 -   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] ........................... C=1, gamma=1, score=0.900000 -   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] ........................... C=1, gamma=1, score=0.966667 -   0.0s
[CV] C=1, gamma=1 ....................................................
[CV] ........................... C=1, gamma=1, score=1.000000 -   0.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=0.900000 -   0.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=0.966667 -   0.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=0.866667 -   0.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=0.933333 -   0.0s
[CV] C=10, gamma=0.001 ...............................................
[CV] ...................... C=10, gamma=0.001, score=1.000000 -   0.0s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=0.966667 -   0.0s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=1.000000 -   0.0s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=0.966667 -   0.0s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=0.966667 -   0.0s
[CV] C=10, gamma=0.01 ................................................
[CV] ....................... C=10, gamma=0.01, score=1.000000 -   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=0.966667 -   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=1.000000 -   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=0.966667 -   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=0.966667 -   0.0s
[CV] C=10, gamma=0.1 .................................................
[CV] ........................ C=10, gamma=0.1, score=1.000000 -   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=0.966667 -   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=0.966667 -   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=0.900000 -   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=0.933333 -   0.0s
[CV] C=10, gamma=1 ...................................................
[CV] .......................... C=10, gamma=1, score=1.000000 -   0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
Out[11]:
GridSearchCV(cv=5, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'C': [0.001, 0.01, 0.1, 1, 10], 'gamma': [0.001, 0.01, 0.1, 1]},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=3)

In [12]:
grid_search.best_params_


Out[12]:
{'C': 1, 'gamma': 0.1}

In [13]:
grid_search.best_score_


Out[13]:
0.97999999999999998

In [14]:
# We extract just the scores
scores = [x[1] for x in grid_search.grid_scores_]
scores = np.array(scores).reshape(5, 4)

plt.matshow(scores)
plt.xlabel('gamma')
plt.ylabel('C')
plt.colorbar()
plt.xticks(np.arange(4), param_grid['gamma'])
plt.yticks(np.arange(5), param_grid['C']);


Avoid Overfitting with Hyper-Parameters


In [15]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [16]:
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)


Out[16]:
GridSearchCV(cv=5, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params={}, iid=True, loss_func=None, n_jobs=1,
       param_grid={'C': [0.001, 0.01, 0.1, 1, 10], 'gamma': [0.001, 0.01, 0.1, 1]},
       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,
       verbose=0)

In [17]:
grid_search.predict(X_test)


Out[17]:
array([1, 0, 0, 0, 1, 0, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 1, 0, 2, 1, 2,
       2, 1, 0, 1, 0, 2, 0, 2, 0, 2, 1, 1, 1, 1, 1])

In [18]:
grid_search.score(X_test, y_test)


Out[18]:
0.97368421052631582

Nested Cross-Validation


In [19]:
cross_val_score(grid_search, X, y, cv=5)


Out[19]:
array([ 0.96666667,  1.        ,  0.96666667,  0.96666667,  1.        ])

In [ ]: